Data this week comes from the USDA (United States Department of Agriculture). The raw datasets (Excel Sheets) can be found at - https://www.ers.usda.gov/data-products/dairy-data/documentation/#Loc3
This analysis leverages functions from various libraries, loaded below. Also, the settings for not using scientific notation are also loaded here.
library(tidyverse)
library(scales)
library(dplyr)
library(lubridate)
library(lemon)
library(ggpmisc)
library(ggpubr)
library(devtools)
library(reshape2)
library(plotly)
library(ggplot2)
options(scipen=999)
milkcow_facts <- read_csv("https://raw.githubusercontent.com/rfordatascience/tidytuesday/master/data/2019/2019-01-29/milkcow_facts.csv")
fluid_milk_sales <- read_csv("https://raw.githubusercontent.com/rfordatascience/tidytuesday/master/data/2019/2019-01-29/fluid_milk_sales.csv")
milk_product_facts <- read_csv("https://raw.githubusercontent.com/rfordatascience/tidytuesday/master/data/2019/2019-01-29/milk_products_facts.csv")
clean_cheese <- read_csv("https://raw.githubusercontent.com/rfordatascience/tidytuesday/master/data/2019/2019-01-29/clean_cheese.csv")
state_milk_production <- read_csv("https://raw.githubusercontent.com/rfordatascience/tidytuesday/master/data/2019/2019-01-29/state_milk_production.csv")
#Loads the data in a new window for a tabular visualization of the contents.
View(milkcow_facts)
View(fluid_milk_sales)
View(milk_product_facts)
view(clean_cheese)
view(state_milk_production)
Milkcow Facts - this dataset shows yearly values from 1980-2014 for various facts that help understand the milk industry such as average price of cows and average milk per cow.
Fluid Milk Sales - this dataset shows the sales for different fluid beverage milk products from 1975-2017.
Milk Product Facts - this dataset shows the average yearly consumption in lbs per person for various milk products from 1975-2017.
Clean Cheese - this dataset shows the average yearly consumption in lbs per person for various cheese products from 1970-2017.
State Milk Production - this dataset shows the total milk production values for each state from 1970-2017
summary(milkcow_facts)
## year avg_milk_cow_number milk_per_cow milk_production_lbs
## Min. :1980 Min. : 9010000 Min. :11891 Min. :128406000000
## 1st Qu.:1988 1st Qu.: 9171000 1st Qu.:14254 1st Qu.:144463500000
## Median :1997 Median : 9314000 Median :16871 Median :156091000000
## Mean :1997 Mean : 9695743 Mean :16962 Mean :162563114286
## 3rd Qu.:2006 3rd Qu.:10135000 3rd Qu.:19723 3rd Qu.:179356500000
## Max. :2014 Max. :11059000 Max. :22259 Max. :206054000000
## avg_price_milk dairy_ration milk_feed_price_ratio
## Min. :0.1210 Min. :0.03445 Min. :1.520
## 1st Qu.:0.1275 1st Qu.:0.04550 1st Qu.:2.540
## Median :0.1360 Median :0.04914 Median :2.700
## Mean :0.1462 Mean :0.05784 Mean :2.697
## 3rd Qu.:0.1530 3rd Qu.:0.05886 3rd Qu.:3.030
## Max. :0.2400 Max. :0.12150 Max. :3.640
## milk_cow_cost_per_animal milk_volume_to_buy_cow_in_lbs alfalfa_hay_price
## Min. : 820 Min. : 6560 Min. : 64.64
## 1st Qu.:1100 1st Qu.: 7574 1st Qu.: 79.22
## Median :1190 Median : 8626 Median : 94.03
## Mean :1283 Mean : 8848 Mean :104.59
## 3rd Qu.:1425 3rd Qu.: 9697 3rd Qu.:109.20
## Max. :1950 Max. :13411 Max. :206.08
## slaughter_cow_price
## Min. :0.3300
## 1st Qu.:0.3988
## Median :0.4503
## Mean :0.4875
## 3rd Qu.:0.5147
## Max. :1.0204
print(milkcow_facts)
## # A tibble: 35 x 11
## year avg_milk_cow_nu~ milk_per_cow milk_production~ avg_price_milk
## <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 1980 10799000 11891 128406000000 0.13
## 2 1981 10898000 12183 132770000000 0.138
## 3 1982 11011000 12306 135505000000 0.136
## 4 1983 11059000 12622 139588000000 0.136
## 5 1984 10793000 12541 135351000000 0.135
## 6 1985 10981000 13024 143012000000 0.127
## 7 1986 10773000 13285 143124000000 0.125
## 8 1987 10327000 13819 142709000000 0.125
## 9 1988 10224000 14185 145034000000 0.122
## 10 1989 10046000 14323 143893000000 0.136
## # ... with 25 more rows, and 6 more variables: dairy_ration <dbl>,
## # milk_feed_price_ratio <dbl>, milk_cow_cost_per_animal <dbl>,
## # milk_volume_to_buy_cow_in_lbs <dbl>, alfalfa_hay_price <dbl>,
## # slaughter_cow_price <dbl>
summary(fluid_milk_sales)
## year milk_type pounds
## Min. :1975 Length:387 Min. : 76000000
## 1st Qu.:1985 Class :character 1st Qu.: 836500000
## Median :1996 Mode :character Median : 3919500000
## Mean :1996 Mean :11960002991
## 3rd Qu.:2007 3rd Qu.:17477450000
## Max. :2017 Max. :55531287319
print(fluid_milk_sales)
## # A tibble: 387 x 3
## year milk_type pounds
## <dbl> <chr> <dbl>
## 1 1975 Whole 36188000000
## 2 1976 Whole 35241000000
## 3 1977 Whole 34036000000
## 4 1978 Whole 33235000000
## 5 1979 Whole 32480000000
## 6 1980 Whole 31253000000
## 7 1981 Whole 30397000000
## 8 1982 Whole 29350000000
## 9 1983 Whole 28871000000
## 10 1984 Whole 28204000000
## # ... with 377 more rows
summary(milk_product_facts)
## year fluid_milk fluid_yogurt butter
## Min. :1975 Min. :149.0 Min. : 1.968 Min. :4.187
## 1st Qu.:1986 1st Qu.:183.0 1st Qu.: 3.782 1st Qu.:4.368
## Median :1996 Median :205.0 Median : 5.866 Median :4.543
## Mean :1996 Mean :202.9 Mean : 7.163 Mean :4.707
## 3rd Qu.:2006 3rd Qu.:223.5 3rd Qu.:11.310 3rd Qu.:4.909
## Max. :2017 Max. :247.0 Max. :14.930 Max. :5.695
## cheese_american cheese_other cheese_cottage evap_cnd_canned_whole_milk
## Min. : 8.147 Min. : 6.126 Min. :2.071 Min. :0.9401
## 1st Qu.:11.281 1st Qu.:10.677 1st Qu.:2.561 1st Qu.:1.4937
## Median :12.122 Median :15.261 Median :2.655 Median :1.8380
## Mean :11.955 Mean :14.708 Mean :3.129 Mean :2.0359
## 3rd Qu.:12.953 3rd Qu.:18.960 3rd Qu.:4.028 3rd Qu.:2.3380
## Max. :15.056 Max. :22.046 Max. :4.632 Max. :3.9499
## evap_cnd_bulk_whole_milk evap_cnd_bulk_and_can_skim_milk
## Min. :0.4391 Min. :3.018
## 1st Qu.:0.5773 1st Qu.:3.642
## Median :0.6956 Median :4.239
## Mean :0.8117 Mean :4.319
## 3rd Qu.:1.0574 3rd Qu.:5.168
## Max. :1.4619 Max. :5.584
## frozen_ice_cream_regular frozen_ice_cream_reduced_fat frozen_sherbet
## Min. :12.47 Min. :5.673 Min. :0.8029
## 1st Qu.:14.69 1st Qu.:6.080 1st Qu.:1.1098
## Median :15.71 Median :6.327 Median :1.1779
## Mean :15.63 Mean :6.396 Mean :1.1357
## 3rd Qu.:17.06 3rd Qu.:6.613 3rd Qu.:1.2175
## Max. :18.21 Max. :7.552 Max. :1.3645
## frozen_other dry_whole_milk dry_nonfat_milk dry_buttermilk
## Min. :1.349 Min. :0.09496 Min. :2.121 Min. :0.1652
## 1st Qu.:2.274 1st Qu.:0.19626 1st Qu.:2.619 1st Qu.:0.1973
## Median :2.910 Median :0.30000 Median :3.050 Median :0.2000
## Mean :3.127 Mean :0.31371 Mean :3.021 Mean :0.2269
## 3rd Qu.:3.759 3rd Qu.:0.40000 3rd Qu.:3.313 3rd Qu.:0.2452
## Max. :6.540 Max. :0.60000 Max. :4.284 Max. :0.3924
## dry_whey
## Min. :1.890
## 1st Qu.:2.400
## Median :3.022
## Mean :3.045
## 3rd Qu.:3.650
## Max. :4.087
print(milk_product_facts)
## # A tibble: 43 x 18
## year fluid_milk fluid_yogurt butter cheese_american cheese_other
## <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 1975 247 1.97 4.73 8.15 6.13
## 2 1976 247 2.13 4.31 8.88 6.63
## 3 1977 244 2.34 4.29 9.21 6.78
## 4 1978 241 2.45 4.35 9.53 7.31
## 5 1979 238 2.44 4.49 9.60 7.57
## 6 1980 234 2.50 4.47 9.62 7.90
## 7 1981 230 2.44 4.24 10.2 8.03
## 8 1982 224 2.58 4.35 11.3 8.60
## 9 1983 223 3.16 4.91 11.6 8.96
## 10 1984 224 3.55 4.98 11.9 9.62
## # ... with 33 more rows, and 12 more variables: cheese_cottage <dbl>,
## # evap_cnd_canned_whole_milk <dbl>, evap_cnd_bulk_whole_milk <dbl>,
## # evap_cnd_bulk_and_can_skim_milk <dbl>, frozen_ice_cream_regular <dbl>,
## # frozen_ice_cream_reduced_fat <dbl>, frozen_sherbet <dbl>,
## # frozen_other <dbl>, dry_whole_milk <dbl>, dry_nonfat_milk <dbl>,
## # dry_buttermilk <dbl>, dry_whey <dbl>
summary(clean_cheese)
## Year Cheddar American Other Mozzarella
## Min. :1970 Min. : 5.790 Min. :1.200 Min. : 1.190
## 1st Qu.:1982 1st Qu.: 8.297 1st Qu.:2.220 1st Qu.: 3.183
## Median :1994 Median : 9.515 Median :2.585 Median : 7.695
## Mean :1994 Mean : 8.889 Mean :2.618 Mean : 6.861
## 3rd Qu.:2005 3rd Qu.: 9.920 3rd Qu.:2.965 3rd Qu.: 9.967
## Max. :2017 Max. :11.070 Max. :3.990 Max. :11.730
##
## Italian other Swiss Brick Muenster
## Min. :0.870 Min. :0.880 Min. :0.01000 Min. :0.1700
## 1st Qu.:1.522 1st Qu.:1.067 1st Qu.:0.03000 1st Qu.:0.2775
## Median :2.185 Median :1.170 Median :0.05000 Median :0.3350
## Mean :2.153 Mean :1.155 Mean :0.05396 Mean :0.3402
## 3rd Qu.:2.752 3rd Qu.:1.240 3rd Qu.:0.07250 3rd Qu.:0.4025
## Max. :3.490 Max. :1.350 Max. :0.12000 Max. :0.5300
##
## Cream and Neufchatel Blue Other Dairy Cheese Processed Cheese
## Min. :0.610 Min. :0.1500 Min. :0.410 Min. :3.310
## 1st Qu.:1.100 1st Qu.:0.1600 1st Qu.:0.750 1st Qu.:3.817
## Median :2.050 Median :0.1700 Median :0.965 Median :4.415
## Mean :1.744 Mean :0.1981 Mean :1.018 Mean :4.329
## 3rd Qu.:2.350 3rd Qu.:0.1825 3rd Qu.:1.295 3rd Qu.:4.805
## Max. :2.640 Max. :0.3200 Max. :1.590 Max. :5.440
## NA's :12
## Foods and spreads Total American Chese Total Italian Cheese
## Min. :1.910 Min. : 7.00 Min. : 2.050
## 1st Qu.:2.987 1st Qu.:10.81 1st Qu.: 4.685
## Median :3.220 Median :11.83 Median : 9.945
## Mean :3.164 Mean :11.51 Mean : 9.012
## 3rd Qu.:3.440 3rd Qu.:12.84 3rd Qu.:12.727
## Max. :3.980 Max. :15.06 Max. :15.210
##
## Total Natural Cheese Total Processed Cheese Products
## Min. :11.37 Min. :5.530
## 1st Qu.:19.47 1st Qu.:6.897
## Median :26.29 Median :7.595
## Mean :25.35 Mean :7.492
## 3rd Qu.:31.78 3rd Qu.:8.195
## Max. :37.23 Max. :8.750
##
print(clean_cheese)
## # A tibble: 48 x 17
## Year Cheddar `American Other` Mozzarella `Italian other` Swiss Brick
## <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 1970 5.79 1.2 1.19 0.87 0.88 0.1
## 2 1971 5.91 1.42 1.38 0.92 0.94 0.11
## 3 1972 6.01 1.67 1.57 1.02 1.06 0.1
## 4 1973 6.07 1.76 1.76 1.03 1.06 0.11
## 5 1974 6.31 2.16 1.86 1.09 1.18 0.11
## 6 1975 6.04 2.11 2.11 1.12 1.09 0.09
## 7 1976 6.45 2.44 2.31 1.24 1.25 0.09
## 8 1977 6.8 2.42 2.46 1.26 1.21 0.07
## 9 1978 6.94 2.59 2.68 1.37 1.33 0.08
## 10 1979 6.93 2.67 2.8 1.42 1.35 0.06
## # ... with 38 more rows, and 10 more variables: Muenster <dbl>, `Cream and
## # Neufchatel` <dbl>, Blue <dbl>, `Other Dairy Cheese` <dbl>, `Processed
## # Cheese` <dbl>, `Foods and spreads` <dbl>, `Total American Chese` <dbl>,
## # `Total Italian Cheese` <dbl>, `Total Natural Cheese` <dbl>, `Total
## # Processed Cheese Products` <dbl>
summary(state_milk_production)
## region state year milk_produced
## Length:2400 Length:2400 Min. :1970 Min. : 3000000
## Class :character Class :character 1st Qu.:1982 1st Qu.: 461000000
## Mode :character Mode :character Median :1994 Median : 1347500000
## Mean :1994 Mean : 3131641667
## 3rd Qu.:2005 3rd Qu.: 2744250000
## Max. :2017 Max. :42339000000
print(state_milk_production)
## # A tibble: 2,400 x 4
## region state year milk_produced
## <chr> <chr> <dbl> <dbl>
## 1 Northeast Maine 1970 619000000
## 2 Northeast New Hampshire 1970 356000000
## 3 Northeast Vermont 1970 1970000000
## 4 Northeast Massachusetts 1970 658000000
## 5 Northeast Rhode Island 1970 75000000
## 6 Northeast Connecticut 1970 661000000
## 7 Northeast New York 1970 10341000000
## 8 Northeast New Jersey 1970 730000000
## 9 Northeast Pennsylvania 1970 7124000000
## 10 Northeast Delaware 1970 130000000
## # ... with 2,390 more rows
The datasets all seem to be in good condition for analyses. No facilitating changes are needed. Any changes will be made with the visualizations.
## `geom_smooth()` using method = 'loess' and formula 'y ~ x'
## `stat_bindot()` using `bins = 30`. Pick better value with `binwidth`.
## `summarise()` ungrouping output (override with `.groups` argument)
milkcow_facts %>%
ggplot(aes(x = milk_feed_price_ratio)) +
geom_histogram(bins = 15, show.legend = TRUE) +
labs(x = "Feed Price Ratio", y = "Count" )
milkcow_facts %>%
ggplot(aes(x = milk_per_cow, y = avg_milk_cow_number, color = year)) +
geom_point() +
geom_smooth(stat = "smooth", method = "lm") +
labs(x = "Milk Per Cow", y = "Average Number of Cows", color = "Year") +
theme_bw()
## `geom_smooth()` using formula 'y ~ x'
milkcow_facts %>%
ggplot(aes(x = milk_per_cow, y = milk_cow_cost_per_animal, color = year)) +
geom_point() +
geom_line() +
geom_smooth(se = FALSE, method = "lm") +
labs(title = "Yearly Relationship Between Milk Per Cow and Cost Per Animal",x = "Milk Per Cow", y = "Average Cost Per Cow", color = "Year") +
theme_bw()
## `geom_smooth()` using formula 'y ~ x'